package com.ruoyi.system.utils;

import com.ruoyi.system.pojo.CjCjmx;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;


import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

@Component
public class HtmlParseUtil {
    public static void main(String[] args) throws Exception {
//        new HtmlParseUtil().paseShanDong("管理会议");
    }

    public List<CjCjmx> paseShanDong(String keywords, int rwid) throws Exception {
        //获取当前页面
        String url = "http://www.ccgp-shandong.gov.cn/sdgp2017/site/listnew.jsp";
        Document document = Jsoup.parse(new URL(url), 3000);
        Elements elements = document.getElementsByClass("news_list2");
        List<CjCjmx> list = new ArrayList<>();

        for (Element el : elements) {
            //获取标题
            Elements aArray = el.getElementsByTag("a");

            aArray.stream().forEach(a -> {
                String text = a.text();
                String aurl = a.attr("href");

                if (text.contains(keywords)) {
                    CjCjmx content = new CjCjmx();
                    content.setWzbt(text);//标题
                    content.setUrl(aurl);//地址
                    list.add(content);
                }
            });
        }

        list.stream().forEach(content -> {
            try {
                Document ziDocument = Jsoup.parse(new URL("http://www.ccgp-shandong.gov.cn" + content.getUrl()), 3000);
                Elements ziElements = ziDocument.getElementsByClass("listConts");
                ziElements.stream().forEach(element -> {
                    //标题
                    Elements titles = element.getElementsByClass("title");
                    Element titleEle = titles.get(0);
                    content.setWzbt(titleEle.text());
                    //任务id
                    content.setRwid(rwid);
                    Elements mideaEles = element.getElementsByTag("midea");
                    //发布时间
                    Element fbsj = mideaEles.get(0);
                    content.setFbsj(fbsj.text());
                    //发表机构
                    Element fbjg = mideaEles.get(1);
                    //返回发布人
                    content.setFbjg(fbjg.text());

                    Elements text = element.getElementsByTag("p");
                    if (!text.isEmpty()){
//                        for (int i = 0; i < text.size(); i++) {
//                            Element articleContent = text.get(i);
//                            content.setWznr(articleContent.text());//内容
//                        }
                        Element articleContent = text.get(1); //返回文章内容
                        content.setWznr(articleContent.text());//内容
                    }
                });
            } catch (IOException e) {
                e.printStackTrace();
            }
        });

        return list;
    }
}
